library(tidyverse)
library(dplyr)
library(janitor)
library(plotly)
library(stringr)
library(plotly)
data =
read_csv("Data/SearchResultsTable.csv") %>%
clean_names()
data_for_plot =
data %>%
select(nct_number, start_date, completion_date, phases, study_type,
enrollment, interventions) %>%
mutate(mm_start = str_sub(start_date, 1, 3),
yy_start = str_sub(start_date, -4, -1),
mm_end = str_sub(completion_date, 1, 3),
yy_end = str_sub(completion_date, -4, -1)) %>%
filter(!mm_start == "nul" & !mm_end == "nul" &
!yy_start == "null" & !yy_end == "null") %>%
mutate(date_start = as.Date(paste(yy_start, mm_start, sep = "-"), "%Y-%b"),
date_end = as.Date(paste(yy_end, mm_end, sep = "-"), "%Y-%b"))
temp =
data %>%
select(nct_number, start_date, completion_date, phases, study_type,
enrollment, interventions) %>%
mutate(mm_start = str_sub(start_date, 1, 3),
yy_start = str_sub(start_date, -4, -1),
mm_end = str_sub(completion_date, 1, 3),
yy_end = str_sub(completion_date, -4, -1)) %>%
filter(!mm_start == "nul" & !mm_end == "nul" &
!yy_start == "null" & !yy_end == "null")
temp =
temp %>%
mutate(date = paste(yy_start, mm_start, sep = "-"))
as.Date(temp$date[1:10], "%Y-%b")
#Sys.setlocale("en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8")
data_for_plot %>%
plot_ly(x = ~product_name, y = ~n, color = ~product_name,
type = "bar") %>%
layout(xaxis = list(showticklabels = FALSE))
Separate elligible enrollment age into starting and ending age:
data_for_plot =
data %>%
select(rank, nct_number, phases, study_type, enrollment, nct_number, age) %>%
mutate(age_start = str_sub(age, 1, 3) ) %>%
mutate(age_start = replace(age_start, age_start == "Chi", 0)) %>%
mutate(age_start = replace(age_start, age_start == "up ", 0)) %>%
separate(age_start, c("age_start", "remove"), " ") %>%
mutate(age_start = as.numeric(age_start)) %>%
mutate(n = as.numeric(enrollment)) %>%
mutate(age_end = str_sub(age, 13, 14)) %>%
# replace "...Years and older (Adult, Senior)" with age_end==100
mutate(age_end = replace(age_end, age_end == " o", 100)) %>%
#replace "Child, Adult, Senior" with age_end == 100
mutate(age_end = replace(age_end, age_end == ", ", 100)) %>%
#replace "up to 100 Years (Child, Adult, Senior)" with age_end==100
mutate(age_end = replace(age_end, age_end == "ar", 100)) %>%
# replace "...Years and older (Child, Adult, Senior)" for those with
# starting one digit age with age_end == 100
mutate(age_end = replace(age_end, age_end == "ol", 100)) %>%
# replace "up to ..Years (Child, Adult, Senior)" where ... is 2 digit
# with age_end == 100
mutate(age_end1 = str_sub(age, 7, 8)) %>%
mutate(age_end = replace(age_end, age_end == "rs", age_end1[age_end == "rs"])) %>%
# convert age_end to numeric
mutate(age_end = as.numeric(age_end)) %>%
select(-c(remove, age_end1)) %>%
# reorder
mutate(nct_number = forcats::fct_reorder(nct_number, age_start))
Static dot plot with facets for study type. We can set study type as user input/filter in the final dashboard:
data_for_plot %>%
na.omit() %>%
#filter(study_type == "Interventional") %>%
#filter(study_type == "Observational") %>%
ggplot(aes(x = age_start, y = n, color = phases)) +
geom_point(alpha = 0.3) +
geom_jitter(width = 0.5) +
facet_wrap(~study_type) +
labs(x = "eligible entry age",
y = "enrollment") +
theme_bw()
Interactive dot plot (without facetting, but can implememt if needed):
data_for_plot %>%
mutate(hover_text = str_c("\nStudy ID: ", nct_number)) %>%
na.omit() %>%
plot_ly(x = ~age_start, y = ~n, color = ~phases,
text = ~hover_text,
type = "scatter", mode = "marker")
Prepare data for line plot - convert into long format and group by nct_number:
data_for_line_plot =
data_for_plot %>%
gather(key = "eligible_age", value = "years", age_start, age_end) %>%
group_by(nct_number) %>%
arrange(years)
Static plot with lines for eligible age:
data_for_line_plot %>%
na.omit() %>%
#filter(study_type == "Interventional") %>%
#filter(study_type == "Observational") %>%
ggplot(aes(x = years, y = n, color = phases, group = nct_number)) +
geom_path() +
facet_wrap(~study_type) +
labs(x = "eligible age",
y = "enrollment") +
theme_bw()
Static plot with lines for eligible age:
p = data_for_line_plot %>%
na.omit() %>%
#filter(study_type == "Interventional") %>%
#filter(study_type == "Observational") %>%
ggplot(aes(x = years, y = n, color = phases, group = nct_number)) +
geom_path() +
facet_wrap(~study_type) +
labs(x = "eligible age",
y = "enrollment") +
theme_bw()
ggplotly(p)